## Programmer: Vito D'Orazio
## calc_dist_111914.r
##
## inputs: select_sequences_111914.RData
## outputs: seq_analysis_111914.dta

## Can toggle for UCDP data


rm(list=ls())
#load("all_sequences.RData")

setwd("/Users/vjdorazio/Desktop/Sequence_Analysis_paper/replication_data")

# Toggle these for gtds or ucdp distances
#GTDS
load("select_sequences_111914.RData")
outfilename <- "seq_analysis_111914.dta"

#UCDP
#load("select_sequences_ucdp_111914.RData")
#outfilename <- "seq_analysis_ucdp_111914.dta"


library(fields)
library(foreign)
library(entropy)
library(energy)

## FUNCTIONS FOR CALCULATING THE DISTANCE BETWEEN VECTORS

# A function whose input is two vectors of equal length and
# whose output is the Euclidian distance between them
calcEucDist <- function(x,y) {
  a <- 0
  x <- as.numeric(x)
  y <- as.numeric(y)
  if(isTRUE(length(x)==length(y))) { #prob unnecessary check
    a <- x-y
    a <- sum(a*a)
  }
  return (sqrt(a))
}

calcAbsEucDist <- function(x,y) {
  a <- 0
  x <- as.numeric(x)
  y <- as.numeric(y)
  if(isTRUE(length(x)==length(y))) { #prob unnecessary check
    a <- x-y
    a <- sum(abs(a))
  }
  return (a)
}

# A function whose input is two vector of equal length and
# whose output is the Levenshtein distance between them
## Special Thanks to Rajarshi Guha http://rguha.net/code/R/
calcLevDist <- function(s,t) {
 #   s <- strsplit(s,'')[[1]] # already in this form
 #   t <- strsplit(t,'')[[1]]

    n <- length(s) #n and m should be 13
    m <- length(t)

    if (n == 0) { #prob unnecessary check
        return (m)
    } else if (m == 0) {
        return (n)
    }

    d <- matrix(0,nrow=m+1, ncol=n+1) #14x14 matrix
    d[1,] <- 0:n #coding the first row and col the sequence 0:13
    d[,1] <- 0:m

    for (i in 2:(n+1)) { #ext loop 2:14
        for (j in 2:(m+1)) { #inner loop 2:14
            cost = 1

            if (s[i-1] == t[j-1]) { #if equal
                cost = 0
            } else { #if unequal
                cost = abs(as.numeric(s[i-1])-as.numeric(t[j-1]))
            }

            d[j,i] = min(c(d[j-1,i]+1,d[j,i-1]+1,d[j-1,i-1]+cost))
        }
    }
    return(d[m+1,n+1])
}

calcDCOR <- function(x,y) {
    return(dcor(x,y))
}

## routine for Linfoot's normalization of mutual information
normalMI <- function(x,y) {
    d <- rbind(as.numeric(x),as.numeric(y))
    d <- d+1
    mi <- mi.plugin(d)
    return(sqrt(1-exp(-2*mi)))
}



## SECTION 3.a ## SECTION 3.a ## SECTION 3.a ####
#################################################
## CALCULATING DISTANCES CALCULATING DISTANCES ##

## no lag
# the number of combinations of 2
n <- choose(count.seq,2)
n.obs <- 1

final.data <- matrix(data=NA,nrow=n,ncol=4)
final.dists <- matrix(0,nrow=n,ncol=32)
skip <- matrix(FALSE,nrow=count.seq,ncol=count.seq)
diag(skip) <- TRUE # never calc distances on the diag

for(k in 1:count.seq) {
  for(l in 1:count.seq) {
    if(skip[k,l] | skip[l,k]) next # sequences are non-directed
    else {
        skip[k,l] <- TRUE
        skip[l,k] <- TRUE
    }

    temp.stateWeek.seq1 <- paste(history[2,1,k],substr(history[1,1,k],3,7),sep="-")
    temp.stateWeek.seq2 <- paste(history[2,1,l],substr(history[1,1,l],3,7),sep="-")
    temp.othercrises <- as.numeric(history[12,1,k]) + as.numeric(history[12,1,l])
    temp.type.dyad <- 1 # seq1 is only conflict
    if(isTRUE(as.numeric(history[11,1,k]==1) & as.numeric(history[11,1,l]==1)))  # both conflict
        temp.type.dyad <- 0
    if(isTRUE(as.numeric(history[11,1,k])==0 & as.numeric(history[11,1,l]==0)))  # both peace
        temp.type.dyad <- 3
    if(isTRUE(as.numeric(history[11,1,k])==0 & as.numeric(history[11,1,l]==1)))  # seq2 is only conflict
        temp.type.dyad <- 2
    final.data[n.obs,] <- c(temp.stateWeek.seq1,temp.stateWeek.seq2,temp.type.dyad, temp.othercrises)

    euc.A.vcoop <- calcEucDist(history[3,,k],history[3,,l])
    euc.A.mcoop <- calcEucDist(history[4,,k],history[4,,l])
    euc.A.vconfl <- calcEucDist(history[5,,k],history[5,,l])
    euc.A.mconfl <- calcEucDist(history[6,,k],history[6,,l])

    euc.B.vcoop <- calcEucDist(history[7,,k],history[7,,l])
    euc.B.mcoop <- calcEucDist(history[8,,k],history[8,,l])
    euc.B.vconfl <- calcEucDist(history[9,,k],history[9,,l])
    euc.B.mconfl <- calcEucDist(history[10,,k],history[10,,l])

    euc.abs.A.vcoop <- calcAbsEucDist(history[3,,k],history[3,,l])
    euc.abs.A.mcoop <- calcAbsEucDist(history[4,,k],history[4,,l])
    euc.abs.A.vconfl <- calcAbsEucDist(history[5,,k],history[5,,l])
    euc.abs.A.mconfl <- calcAbsEucDist(history[6,,k],history[6,,l])

    euc.abs.B.vcoop <- calcAbsEucDist(history[7,,k],history[7,,l])
    euc.abs.B.mcoop <- calcAbsEucDist(history[8,,k],history[8,,l])
    euc.abs.B.vconfl <- calcAbsEucDist(history[9,,k],history[9,,l])
    euc.abs.B.mconfl <- calcAbsEucDist(history[10,,k],history[10,,l])

    lev.A.vcoop <- calcLevDist(history[3,,k],history[3,,l])
    lev.A.mcoop <- calcLevDist(history[4,,k],history[4,,l])
    lev.A.vconfl <- calcLevDist(history[5,,k],history[5,,l])
    lev.A.mconfl <- calcLevDist(history[6,,k],history[6,,l])

    lev.B.vcoop <- calcLevDist(history[7,,k],history[7,,l])
    lev.B.mcoop <- calcLevDist(history[8,,k],history[8,,l])
    lev.B.vconfl <- calcLevDist(history[9,,k],history[9,,l])
    lev.B.mconfl <- calcLevDist(history[10,,k],history[10,,l])
    
    mi.A.vcoop <- normalMI(history[3,,k],history[3,,l])
    mi.A.mcoop <- normalMI(history[4,,k],history[4,,l])
    mi.A.vconfl <- normalMI(history[5,,k],history[5,,l])
    mi.A.mconfl <- normalMI(history[6,,k],history[6,,l])
    
    mi.B.vcoop <- normalMI(history[7,,k],history[7,,l])
    mi.B.mcoop <- normalMI(history[8,,k],history[8,,l])
    mi.B.vconfl <- normalMI(history[9,,k],history[9,,l])
    mi.B.mconfl <- normalMI(history[10,,k],history[10,,l])

    final.dists[n.obs,] <- c(euc.A.vcoop,euc.A.mcoop,euc.A.vconfl,euc.A.mconfl,
                            euc.B.vcoop,euc.B.mcoop,euc.B.vconfl,euc.B.mconfl,
                            euc.abs.A.vcoop,euc.abs.A.mcoop,euc.abs.A.vconfl,euc.abs.A.mconfl,
                            euc.abs.B.vcoop,euc.abs.B.mcoop,euc.abs.B.vconfl,euc.abs.B.mconfl,
                            lev.A.vcoop,lev.A.mcoop,lev.A.vconfl,lev.A.mconfl,
                            lev.B.vcoop,lev.B.mcoop,lev.B.vconfl,lev.B.mconfl,
                            mi.A.vcoop,mi.A.mcoop,mi.A.vconfl,mi.A.mconfl,
                            mi.B.vcoop,mi.B.mcoop,mi.B.vconfl,mi.B.mconfl)

    n.obs <- n.obs+1
    cat(n.obs,"\n")
    print(n.obs)
    cat("\n")
  }
}

output.data <- as.data.frame(cbind(final.data,final.dists),stringsAsFactors=FALSE)

colnames(output.data) <- c("State_Week_1","State_Week_2","Dyad_Type","Other_Crises","Euc_A_Verb_Coop","Euc_A_Mater_Coop","Euc_A_Verb_Confl","Euc_A_Mater_Confl",
                            "Euc_B_Verb_Coop","Euc_B_Mater_Coop","Euc_B_Verb_Confl","Euc_B_Mater_Confl",
                            "Euc_Abs_A_Verb_Coop","Euc_Abs_A_Mater_Coop","Euc_Abs_A_Verb_Confl","Euc_Abs_A_Mater_Confl",
                            "Euc_Abs_B_Verb_Coop","Euc_Abs_B_Mater_Coop","Euc_Abs_B_Verb_Confl","Euc_Abs_B_Mater_Confl",
                            "Lev_A_Verb_Coop","Lev_A_Mater_Coop","Lev_A_Verb_Confl","Lev_A_Mater_Confl",
                            "Lev_B_Verb_Coop","Lev_B_Mater_Coop","Lev_B_Verb_Confl","Lev_B_Mater_Confl",
                            "mi_A_Verb_Coop","mi_A_Mater_Coop","mi_A_Verb_Confl","mi_A_Mater_Confl",
                            "mi_B_Verb_Coop","mi_B_Mater_Coop","mi_B_Verb_Confl","mi_B_Mater_Confl")

write.dta(output.data, file=outfilename)
